## Download 2016 Canadian Census DAs with population and boundaries and do some basic recoding

library(here)
library(sf)
library(cancensus)
library(dplyr)

## Set the CensusMapper API key here (for the cancensus package)
apikey <- ""


tryCatch(
  {
    set_cancensus_cache_path(".", install = TRUE)
    message("Cache path set successfully.")
  },
  error = function(e) {
    message("the cache path is already set")
  }
)


tryCatch(
  {
    set_cancensus_api_key(apikey, install = TRUE)
    message("The API key has been set")
  },
  error = function(e) {
    message("The API key is already set")
  }
)


## list_census_datasets() %>% print(n = 100)
ca16vars <- list_census_vectors("CA16")
ca16regions <- list_census_regions("CA16")

ca16vars %>%
  filter(label %in% grep("minor|sample", ca16vars$label, ignore.case = TRUE, value = TRUE)) %>%
  select(vector, type, label, parent_vector, details) %>%
  print(n = 200)

### A tibble: 22 × 5
##   vector      type   label                                                                               parent_vector details
##   <chr>       <fct>  <chr>                                                                               <chr>         <chr>
## 1 v_CA16_542  Total  Official language minority (number)                                                 NA            CA 2016 Cen…
## 2 v_CA16_543  Male   Official language minority (number)                                                 NA            CA 2016 Cen…
## 3 v_CA16_544  Female Official language minority (number)                                                 NA            CA 2016 Cen…
## 4 v_CA16_545  Total  Official language minority (percentage)                                             NA            CA 2016 Cen…
## 5 v_CA16_546  Male   Official language minority (percentage)                                             NA            CA 2016 Cen…
## 6 v_CA16_547  Female Official language minority (percentage)                                             NA            CA 2016 Cen…
## 7 v_CA16_3954 Total  Total - Visible minority for the population in private households - 25% sample data NA            CA 2016 Cen…
## 8 v_CA16_3955 Male   Total - Visible minority for the population in private households - 25% sample data NA            CA 2016 Cen…
## 9 v_CA16_3956 Female Total - Visible minority for the population in private households - 25% sample data NA            CA 2016 Cen…
## 10 v_CA16_3957 Total  Total visible minority population                                                   v_CA16_3954   CA 2016 Cen…
## 11 v_CA16_3958 Male   Total visible minority population                                                   v_CA16_3955   CA 2016 Cen…
## 12 v_CA16_3959 Female Total visible minority population                                                   v_CA16_3956   CA 2016 Cen…
## 13 v_CA16_3990 Total  Visible minority, n.i.e.                                                            v_CA16_3957   CA 2016 Cen…
## 14 v_CA16_3991 Male   Visible minority, n.i.e.                                                            v_CA16_3958   CA 2016 Cen…
## 15 v_CA16_3992 Female Visible minority, n.i.e.                                                            v_CA16_3959   CA 2016 Cen…
## 16 v_CA16_3993 Total  Multiple visible minorities                                                         v_CA16_3957   CA 2016 Cen…
## 17 v_CA16_3994 Male   Multiple visible minorities                                                         v_CA16_3958   CA 2016 Cen…
## 18 v_CA16_3995 Female Multiple visible minorities                                                         v_CA16_3959   CA 2016 Cen…
## 19 v_CA16_3996 Total  Not a visible minority                                                              v_CA16_3954   CA 2016 Cen…
## 20 v_CA16_3997 Male   Not a visible minority                                                              v_CA16_3955   CA 2016 Cen…
## 21 v_CA16_3998 Female Not a visible minority                                                              v_CA16_3956   CA 2016 Cen…
## 22 v_CA16_4871 Total  Only regular maintenance or minor repairs needed                                    v_CA16_4870   CA 2016 Cen…
### n.i.e means not included elsewhere

vm_vars <- ca16vars %>%
  filter(vector %in% c("v_CA16_3957", "v_CA16_3990", "v_CA16_3993", "v_CA16_3996", "v_CA16_3954")) %>%
  select(vector, type, label, parent_vector, details)

# Return an sf-class data frame
census_data_16 <- get_census(
  dataset = "CA16",
  resolution = "high",
  regions = list(C = "Canada"),
  level = "DA",
  vectors = c(
    "v_CA16_1", "v_CA16_401", "v_CA16_402", "v_CA16_403", "v_CA16_406", "v_CA16_407",
    "v_CA16_3957", "v_CA16_3990", "v_CA16_3993", "v_CA16_3996", "v_CA16_3954"
  ),
  geo_format = "sf"
)

## Rename some variables and calculate proportion visible minority

census_data_16 <- census_data_16 %>% rename(
  "da_area_kmsq_16" = "Area (sq km)",
  "da_pop_16" = "v_CA16_401: Population, 2016",
  "da_vm_pop_16" = "v_CA16_3957: Total visible minority population",
  "da_vm_pop_nie_16" = "v_CA16_3990: Visible minority, n.i.e.", ## n.i.e. means not included elsewhere
  "da_vm_pop_mult_16" = "v_CA16_3993: Multiple visible minorities",
  "da_non_vm_pop_16" = "v_CA16_3996: Not a visible minority",
  "da_vm_pop_25pct_16" = "v_CA16_3954: Total - Visible minority for the population in private households - 25% sample data",
  "da_area_kmsq2_16" = "v_CA16_407: Land area in square kilometres",
  "da_popdens_16" = "v_CA16_406: Population density per square kilometre",
  "da_pop_11" = "v_CA16_402: Population, 2011",
  "da_popchg_11_16" = "v_CA16_403: Population percentage change, 2011 to 2016"
)

summary(census_data_16)

census_data_16 %>% select(Population, da_pop_16, da_vm_pop_16)

## The denominator is using the long form data (25%) which is where VM is calculated.
census_data_16 <- census_data_16 %>% mutate(da_prop_vm_16 = da_vm_pop_16 / da_vm_pop_25pct_16)
summary(census_data_16$da_prop_vm_16)

## Note that some are missing. I don't know what this means when it comes to
## our data. We will discover later Also a few places with more than 100%
## almost surely because of hiding true numbers for confidentiality (notice
## that they all end in 5 or 0?)

table(census_data_16$da_prop_vm_16 > 1)

census_data_16 %>%
  filter(da_prop_vm_16 > 1) %>%
  select(da_pop_16, da_vm_pop_16, da_non_vm_pop_16, da_prop_vm_16)
## Just recode down to 1 for ease of use later.
census_data_16 <- census_data_16 %>% mutate(da_prop_vm_16 = ifelse(da_prop_vm_16 > 1, 1, da_prop_vm_16))

save(census_data_16, file = here("Data/CensusData/2016_Data", "census_data_16.rda"))

## Check to see that this looks reasonable
## library(ggplot2)
## ## Some issues finding proj.db for projections
## ## this next is not perfectly portable
## Sys.setenv(PROJ_LIB = "/opt/homebrew/Cellar/proj/9.4.0/share/proj")
## Sys.getenv("PROJ_LIB")
## ## [1] "/opt/homebrew/Cellar/proj/9.4.0/share/proj"
## ggplot() +
##   geom_sf(data = census_data_16) +
##   theme_minimal()
